******SET VARIABLES IN OUTPUT DATASET******
clear all
postfile buffer id ///
///
ts_corr_b ts_corr_se ///
///
ts_d1_b ts_d1_se ///
ts_s1_b ts_s1_se ///
ts_d2_b ts_d2_se ///
ts_s2_b ts_s2_se ///
ts_d3_b ts_d3_se ///
ts_s3_b ts_s3_se ///
ts_a3_b ts_a3_se ///
ts_d4_b ts_d4_se ///
ts_s4_b ts_s4_se ///
ts_d5_b ts_d5_se ///
ts_s5_b ts_s5_se ///
ts_d6_b ts_d6_se ///
ts_s6_b ts_s6_se ///
ts_d7_b ts_d7_se ///
ts_s7_b ts_s7_se ///
ts_a7_b ts_a7_se ///
ts_d8_b ts_d8_se ///
ts_s8_b ts_s8_se ///
///
me1_d1_b me1_d1_se ///
me1_s1_b me1_s1_se ///
me1_d2_b me1_d2_se ///
me1_s2_b me1_s2_se ///
me1_d3_b me1_d3_se ///
me1_s3_b me1_s3_se ///
me1_d4_b me1_d4_se ///
me1_s4_b me1_s4_se ///
me1_d5_b me1_d5_se ///
me1_s5_b me1_s5_se ///
me1_t6_b me1_t6_se ///
me1_t7_b me1_t7_se ///
///
me2_d2_b me2_d2_se ///
me2_s2_b me2_s2_se ///
me2_d4_b me2_d4_se ///
me2_s4_b me2_s4_se ///
///
nl_d1_b nl_d1_se ///
nl_s1_b nl_s1_se ///
nl_d2_b nl_d2_se ///
nl_s2_b nl_s2_se ///
nl_d3_b nl_d3_se ///
nl_s3_b nl_s3_se ///
nl_d4_b nl_d4_se ///
nl_s4_b nl_s4_se ///
nl_d5_b nl_d5_se ///
nl_s5_b nl_s5_se ///
nl_d6_b nl_d6_se ///
nl_s6_b nl_s6_se ///
nl_d7_b nl_d7_se ///
nl_s7_b nl_s7_se ///
///
using output, replace


******SET NUMBER OF SIMULATIONS******
forvalues repetition = 1/100 {

quietly{

******SETUP DATA******
clear
set obs 5000
gen id = _n


******GENERATE VARIABLES******
//Split observations into regions by generating regional identifiers
foreach continuous in county_linear industry_linear ///
{
	gen `continuous' = rnormal(0,1)
}
xtile county = county_linear, nquantiles(500)
xtile state = county, nquantiles(50)
xtile industry = industry_linear, nquantiles(500)


//Generate number of observations by region
bysort county: egen county_N = count(id)
gen county_N_1 = county_N - 1

bysort state: egen state_N = count(id)
gen state_N_1 = state_N - 1

bysort industry: egen industry_N = count(id)
gen industry_N_1 = industry_N - 1

bysort county: gen county_id = _n


//Generate treatment variables (true, measured with error, non-linear)
gen v_county = exp(rnormal(0,1))
bysort county (id): replace v_county = v_county[1]

gen v_state = exp(rnormal(0,1))
bysort state (id): replace v_state = v_state[1]

gen v_industry = exp(rnormal(0,1))
bysort industry (id): replace v_industry = v_industry[1]

gen instrument = rnormal(0,1)

gen xe = rnormal(0,1)
gen x0 = instrument + xe
gen x1 = v_county + instrument + xe
gen x2 = v_county + v_industry + instrument + xe
gen x3 = v_county + v_state + v_industry + instrument + xe

sum x0, det
gen uhigh0 = rnormal(0,sqrt(3/7)*`r(sd)') //STV=70%
gen umedium0 = rnormal(0,sqrt(1/9)*`r(sd)') //STV=90%
gen ulow0 = rnormal(0,sqrt(5/95)*`r(sd)') //STV=95%
gen x0errorhigh = x0 + uhigh
gen x0errormedium = x0 + umedium
gen x0errorlow = x0 + ulow
sum x0 x0errorhigh x0errormedium x0errorlow, det

sum x1, det
gen uhigh1 = rnormal(0,sqrt(3/7)*`r(sd)') //STV=70%
gen umedium1 = rnormal(0,sqrt(1/9)*`r(sd)') //STV=90%
gen ulow1 = rnormal(0,sqrt(5/95)*`r(sd)') //STV=95%
gen x1errorhigh = x1 + uhigh1
gen x1errormedium = x1 + umedium1
gen x1errorlow = x1 + ulow1
sum x1 x1errorhigh x1errormedium x1errorlow, det

gen x0_nl = 0
replace x0_nl = x0 if x0>0

gen x1_nl = 0
replace x1_nl = x1 if x1>0

gen x1_nl2 = 0
replace x1_nl2 = x1*x1 if x1>0


//Generate county-level treatment variables
foreach regressor in x0 x1 x2 x3 x0errorhigh x0errormedium x0errorlow x1errorhigh x1errormedium x1errorlow instrument x0_nl x1_nl x1_nl2 {
	bysort county: egen county_total_`regressor' = total(`regressor')
	gen county_total_`regressor'_1 = county_total_`regressor' - `regressor'
	
	gen county_mean_`regressor' = county_total_`regressor' / county_N
	gen county_mean_`regressor'_1 = county_total_`regressor'_1 / county_N_1
}


//Generate state-level treatment variables
foreach regressor in x0 x1 x2 x3 x0errorhigh x0errormedium x0errorlow x1errorhigh x1errormedium x1errorlow instrument x0_nl x1_nl x1_nl2 {
	bysort state: egen state_total_`regressor' = total(`regressor')
	gen state_total_`regressor'_1 = state_total_`regressor' - `regressor'
	
	gen state_mean_`regressor' = state_total_`regressor' / state_N
	gen state_mean_`regressor'_1 = state_total_`regressor'_1 / state_N_1
}


//Generate industry-level treatment variables
foreach regressor in x0 x1 x2 x3 x0errorhigh x0errormedium x0errorlow x1errorhigh x1errormedium x1errorlow instrument x0_nl x1_nl x1_nl2 {
	bysort industry: egen industry_total_`regressor' = total(`regressor')
	gen industry_total_`regressor'_1 = industry_total_`regressor' - `regressor'
	
	gen industry_mean_`regressor' = industry_total_`regressor' / industry_N
	gen industry_mean_`regressor'_1 = industry_total_`regressor'_1 / industry_N_1
}


//True data-generating process
gen e = rnormal(0,1)
gen y_0_spillover = x1 + e
gen y_0_spillover_nl = x1_nl + e
gen y_0_spillover_nl2 = x1_nl2 + e
gen y_0_spillover_nl_x0 = x0_nl + e

gen y_1_spillover = x1 + county_mean_x1_1 + e
gen y_1_spillover_nl = x1_nl + county_mean_x1_nl_1 + e
gen y_1_spillover_nl_x0 = x0_nl + county_mean_x0_nl_1 + e

gen y_0_spillover_x0 = x0 + e
gen y_1_spillover_x0_ind = x0 + industry_mean_x0_1 + e
gen y_1_spillover_x0_cnty = x0 + county_mean_x0_1 + e
gen y_2_spillover_x0 = x0 + county_mean_x0_1 + industry_mean_x0_1 + e

gen y_0_spillover_x2 = x2 + e
gen y_1_spillover_x2 = x2 + county_mean_x2_1 + e
gen y_2_spillover_x2 = x2 + county_mean_x2_1 + industry_mean_x2_1 + e

foreach y in y_0_spillover y_1_spillover y_0_spillover_nl y_1_spillover_nl {
	bysort county: egen county_mean_`y' = mean(`y')
	bysort industry: egen industry_mean_`y' = mean(`y')
}

sort county id



******ONE TRUE SPILLOVER******
//No correlation between two leave-out means
reg county_mean_x2_1 industry_mean_x2_1, cluster(county)
global ts_corr_b = _b[industry_mean_x2_1]
global ts_corr_se = _se[industry_mean_x2_1]

//OLS -- one true spillover -- wrong leave-out mean as regressors -- systematic variation
reg y_1_spillover_x2 x2 industry_mean_x2_1, cluster(county)
global ts_d1_b = _b[x2]
global ts_d1_se = _se[x2]
global ts_s1_b = _b[industry_mean_x2_1]
global ts_s1_se = _se[industry_mean_x2_1]

//IV -- one true spillover -- wrong leave-out mean as regressors -- systematic variation
ivreg y_1_spillover_x2 (x2 industry_mean_x2_1 = instrument industry_mean_instrument_1), cluster(county)
global ts_d2_b = _b[x2]
global ts_d2_se = _se[x2]
global ts_s2_b = _b[industry_mean_x2_1]
global ts_s2_se = _se[industry_mean_x2_1]

//OLS -- one true spillover -- two leave-out means as regressors -- systematic variation
reg y_1_spillover_x2 x2 county_mean_x2_1 industry_mean_x2_1, cluster(county)
global ts_d3_b = _b[x2]
global ts_d3_se = _se[x2]
global ts_s3_b = _b[industry_mean_x2_1]
global ts_s3_se = _se[industry_mean_x2_1]
global ts_a3_b = _b[county_mean_x2_1]
global ts_a3_se = _se[county_mean_x2_1]

//OLS -- one true spillover -- wrong leave-out mean as regressors -- random variation
reg y_1_spillover_x0_cnty x0 industry_mean_x0_1, cluster(county)
global ts_d4_b = _b[x0]
global ts_d4_se = _se[x0]
global ts_s4_b = _b[industry_mean_x0_1]
global ts_s4_se = _se[industry_mean_x0_1]



******TWO TRUE SPILLOVERS******
//OLS -- two true spillovers -- one leave-out mean as regressors -- systematic variation
reg y_2_spillover_x2 x2 industry_mean_x2_1, cluster(county)
global ts_d5_b = _b[x2]
global ts_d5_se = _se[x2]
global ts_s5_b = _b[industry_mean_x2_1]
global ts_s5_se = _se[industry_mean_x2_1]

//IV -- two true spillovers -- one leave-out mean as regressors -- systematic variation
ivreg y_2_spillover_x2 (x2 industry_mean_x2_1 = instrument industry_mean_instrument_1), cluster(county)
global ts_d6_b = _b[x2]
global ts_d6_se = _se[x2]
global ts_s6_b = _b[industry_mean_x2_1]
global ts_s6_se = _se[industry_mean_x2_1]

//OLS -- two true spillovers -- two leave-out means as regressors -- systematic variation
reg y_2_spillover_x2 x2 industry_mean_x2_1 county_mean_x2_1, cluster(county)
global ts_d7_b = _b[x2]
global ts_d7_se = _se[x2]
global ts_s7_b = _b[industry_mean_x2_1]
global ts_s7_se = _se[industry_mean_x2_1]
global ts_a7_b = _b[county_mean_x2_1]
global ts_a7_se = _se[county_mean_x2_1]

//OLS -- two true spillovers -- one leave-out mean as regressor -- random variation
reg y_2_spillover_x0 x0 industry_mean_x0_1, cluster(county)
global ts_d8_b = _b[x0]
global ts_d8_se = _se[x0]
global ts_s8_b = _b[industry_mean_x0_1]
global ts_s8_se = _se[industry_mean_x0_1]

//Not in paper OLS -- two true spillovers -- county-level -- systematic variation
ivreg y_2_spillover_x2 county_mean_x2, cluster(county)



******REGRESSION ANALYSIS FOR MEASUREMENT ERROR -- NO TRUE SPILLOVER******
//ME none -- OLS without spillovers
reg y_0_spillover x1 county_mean_x1_1, cluster(county)
global me1_d1_b = _b[x1]
global me1_d1_se = _se[x1]
global me1_s1_b = _b[county_mean_x1_1]
global me1_s1_se = _se[county_mean_x1_1]

//ME low -- OLS without spillovers
reg y_0_spillover x1errorlow county_mean_x1errorlow_1, cluster(county)
global me1_d2_b = _b[x1errorlow]
global me1_d2_se = _se[x1errorlow]
global me1_s2_b = _b[county_mean_x1errorlow_1]
global me1_s2_se = _se[county_mean_x1errorlow_1]

//ME medium -- OLS without spillovers
reg y_0_spillover x1errormedium county_mean_x1errormedium_1, cluster(county)
global me1_d3_b = _b[x1errormedium]
global me1_d3_se = _se[x1errormedium]
global me1_s3_b = _b[county_mean_x1errormedium_1]
global me1_s3_se = _se[county_mean_x1errormedium_1]

//ME high -- OLS without spillovers
reg y_0_spillover x1errorhigh county_mean_x1errorhigh_1, cluster(county)
global me1_d4_b = _b[x1errorhigh]
global me1_d4_se = _se[x1errorhigh]
global me1_s4_b = _b[county_mean_x1errorhigh_1]
global me1_s4_se = _se[county_mean_x1errorhigh_1]

//ME high -- IV without spillovers
ivreg y_0_spillover (x1errorhigh county_mean_x1errorhigh_1 = instrument county_mean_instrument_1), cluster(county)
global me1_d5_b = _b[x1errorhigh]
global me1_d5_se = _se[x1errorhigh]
global me1_s5_b = _b[county_mean_x1errorhigh_1]
global me1_s5_se = _se[county_mean_x1errorhigh_1]

//ME high -- OLS without spillovers at county-level
reg y_0_spillover county_mean_x1errorhigh, cluster(county)
global me1_t6_b = _b[county_mean_x1errorhigh]
global me1_t6_se = _se[county_mean_x1errorhigh]

//ME high -- IV without spillovers at county-level
ivreg y_0_spillover (county_mean_x1errorhigh = county_mean_instrument), cluster(county)
global me1_t7_b = _b[county_mean_x1errorhigh]
global me1_t7_se = _se[county_mean_x1errorhigh]


******REGRESSION ANALYSIS FOR MEASUREMENT ERROR -- TRUE SPILLOVER******
//ME -- systematic variation
reg y_1_spillover x1errorhigh county_mean_x1errorhigh_1, cluster(county)
global me2_d2_b = _b[x1errorhigh]
global me2_d2_se = _se[x1errorhigh]
global me2_s2_b = _b[county_mean_x1errorhigh_1]
global me2_s2_se = _se[county_mean_x1errorhigh_1]

//ME -- random variation
reg y_1_spillover_x0_cnty x0errorhigh county_mean_x0errorhigh_1, cluster(county)
global me2_d4_b = _b[x0errorhigh]
global me2_d4_se = _se[x0errorhigh]
global me2_s4_b = _b[county_mean_x0errorhigh_1]
global me2_s4_se = _se[county_mean_x0errorhigh_1]


******REGRESSION ANALYSIS FOR NON-LINEAR TREATMENT******
//OLS non-linear -- systematic variation
reg y_0_spillover_nl x1_nl county_mean_x1_nl_1, cluster(county)
global nl_d1_b = _b[x1_nl]
global nl_d1_se = _se[x1_nl]
global nl_s1_b = _b[county_mean_x1_nl_1]
global nl_s1_se = _se[county_mean_x1_nl_1]

//OLS linear -- systematic variation
reg y_0_spillover_nl x1 county_mean_x1_1, cluster(county)
global nl_d2_b = _b[x1]
global nl_d2_se = _se[x1]
global nl_s2_b = _b[county_mean_x1_1]
global nl_s2_se = _se[county_mean_x1_1]

//OLS non-linear squared -- systematic variation
reg y_0_spillover_nl2 x1_nl2 county_mean_x1_nl2_1, cluster(county)
global nl_d3_b = _b[x1_nl2]
global nl_d3_se = _se[x1_nl2]
global nl_s3_b = _b[county_mean_x1_nl2_1]
global nl_s3_se = _se[county_mean_x1_nl2_1]

//OLS linear squared -- systematic variation
reg y_0_spillover_nl2 x1 county_mean_x1_1, cluster(county)
global nl_d4_b = _b[x1]
global nl_d4_se = _se[x1]
global nl_s4_b = _b[county_mean_x1_1]
global nl_s4_se = _se[county_mean_x1_1]

//OLS non-linear -- random variation
reg y_0_spillover_nl_x0 x0_nl county_mean_x0_nl_1, cluster(county)
global nl_d5_b = _b[x0_nl]
global nl_d5_se = _se[x0_nl]
global nl_s5_b = _b[county_mean_x0_nl_1]
global nl_s5_se = _se[county_mean_x0_nl_1]

//OLS linear -- random variation
reg y_0_spillover_nl_x0 x0 county_mean_x0_1, cluster(county)
global nl_d6_b = _b[x0]
global nl_d6_se = _se[x0]
global nl_s6_b = _b[county_mean_x0_1]
global nl_s6_se = _se[county_mean_x0_1]

//IV linear -- random variation
ivreg y_0_spillover_nl (x1 county_mean_x1_1 = instrument county_mean_instrument_1), cluster(county)
global nl_d7_b = _b[x1]
global nl_d7_se = _se[x1]
global nl_s7_b = _b[county_mean_x1_1]
global nl_s7_se = _se[county_mean_x1_1]





******ADD ESTIMATES TO OUTPUT DATASET******
post buffer (`repetition') ///
///
(${ts_corr_b}) (${ts_corr_se}) ///
///
(${ts_d1_b}) (${ts_d1_se}) ///
(${ts_s1_b}) (${ts_s1_se}) ///
(${ts_d2_b}) (${ts_d2_se}) ///
(${ts_s2_b}) (${ts_s2_se}) ///
(${ts_d3_b}) (${ts_d3_se}) ///
(${ts_s3_b}) (${ts_s3_se}) ///
(${ts_a3_b}) (${ts_a3_se}) ///
(${ts_d4_b}) (${ts_d4_se}) ///
(${ts_s4_b}) (${ts_s4_se}) ///
(${ts_d5_b}) (${ts_d5_se}) ///
(${ts_s5_b}) (${ts_s5_se}) ///
(${ts_d6_b}) (${ts_d6_se}) ///
(${ts_s6_b}) (${ts_s6_se}) ///
(${ts_d7_b}) (${ts_d7_se}) ///
(${ts_s7_b}) (${ts_s7_se}) ///
(${ts_a7_b}) (${ts_a7_se}) ///
(${ts_d8_b}) (${ts_d8_se}) ///
(${ts_s8_b}) (${ts_s8_se}) ///
///
(${me1_d1_b}) (${me1_d1_se}) ///
(${me1_s1_b}) (${me1_s1_se}) ///
(${me1_d2_b}) (${me1_d2_se}) ///
(${me1_s2_b}) (${me1_s2_se}) ///
(${me1_d3_b}) (${me1_d3_se}) ///
(${me1_s3_b}) (${me1_s3_se}) ///
(${me1_d4_b}) (${me1_d4_se}) ///
(${me1_s4_b}) (${me1_s4_se}) ///
(${me1_d5_b}) (${me1_d5_se}) ///
(${me1_s5_b}) (${me1_s5_se}) ///
(${me1_t6_b}) (${me1_t6_se}) ///
(${me1_t7_b}) (${me1_t7_se}) ///
///
(${me2_d2_b}) (${me2_d2_se}) ///
(${me2_s2_b}) (${me2_s2_se}) ///
(${me2_d4_b}) (${me2_d4_se}) ///
(${me2_s4_b}) (${me2_s4_se}) ///
///
(${nl_d1_b}) (${nl_d1_se}) ///
(${nl_s1_b}) (${nl_s1_se}) ///
(${nl_d2_b}) (${nl_d2_se}) ///
(${nl_s2_b}) (${nl_s2_se}) ///
(${nl_d3_b}) (${nl_d3_se}) ///
(${nl_s3_b}) (${nl_s3_se}) ///
(${nl_d4_b}) (${nl_d4_se}) ///
(${nl_s4_b}) (${nl_s4_se}) ///
(${nl_d5_b}) (${nl_d5_se}) ///
(${nl_s5_b}) (${nl_s5_se}) ///
(${nl_d6_b}) (${nl_d6_se}) ///
(${nl_s6_b}) (${nl_s6_se}) ///
(${nl_d7_b}) (${nl_d7_se}) ///
(${nl_s7_b}) (${nl_s7_se}) ///

}
}


******ANALYZE OUTPUT DATASET******
postclose buffer
use output, clear
collapse *
format %9.3f *
compress
save output, replace